// .........................................................................
// Title: dealogic_dm.do
//
// Imports and cleans data from the Dealogic Debt Capital Markets (DCM)
// database; creates consolidated Dealogic DCM dataset
// .........................................................................

cap mkdir $tmp/dealogic

* -----------------------------------------------------------------
* Build Dealogic DCM data
* -----------------------------------------------------------------

/* Company Files */

* SIC Codes
use "$raw/dealogic/CompanySICCodes.dta", clear
drop sortnumber
gsort companyid -isprimary
by companyid: gen sortnum = _n
keep if sortnum == 1
drop isprimary sortnum
save "$tmp/dealogic/temp_sic_codes.dta", replace

* NAICS
use "$raw/dealogic/CompanyNAICSCodes.dta", clear
drop sortnumber
gsort companyid -isprimary
by companyid: gen sortnum = _n
keep if sortnum == 1
drop isprimary sortnum
save "$tmp/dealogic/temp_naics_codes.dta", replace

* Company – Master File
use "$raw/dealogic/Company.dta", clear
mmerge id using "$tmp/dealogic/temp_naics_codes.dta", umatch(companyid)
drop if _merge == 2
rename code NAICS
mmerge id using "$tmp/dealogic/temp_sic_codes.dta", umatch(companyid)
drop if _merge == 2
rename code SIC
drop _merge
keep id companyparentid immediateparentid name nationalityofbusinessisocode nationalityofincorporationisocod organisationtypeid publicstatusid sigid NAICS SIC
gen code2 = string(NAICS)
gen _code2 = substr(code2,1,2)
drop code2
gen code2 = real(_code2)
drop _code2
rename code2 NAICS2

gen code2 = string(SIC)
gen _code2 = substr(code2,1,1)
drop code2
gen code2 = real(_code2)
drop _code2
rename code2 SIC2
gen is_gov_org_type = .
replace is_gov_org_type = 1 if inlist(organisationtypeid,1,2,3)
replace is_gov_org_type = 0 if ~inlist(organisationtypeid,1,2,3) & organisationtypeid!=.

gen is_gov_NAICS = . 
replace is_gov_NAICS = 1 if NAICS2 == 92
replace is_gov_NAICS = 0 if NAICS2 != 92 & NAICS2 != .

gen is_gov_SIC = . 
replace is_gov_SIC = 1 if SIC2 == 9
replace is_gov_SIC = 0 if SIC2 != 9 & SIC2 != .
save "$tmp/dealogic/company_complete.dta", replace

/* DCM Deal Tranches - Proceeds and Value */

use "$raw/dealogic/DCMDealTranchesProceeds.dta", clear
rename value tranche_proceeds_value
qui mmerge dcmdealtranchedealid  dcmdealtranchetrancheid currencyisocode using "$raw/dealogic/DCMDealTranchesValue.dta", uname(tranche_value_)
drop _merge
bys dcmdealtranchedealid dcmdealtranchetrancheid: gen N = _N
mmerge dcmdealtranchedealid dcmdealtranchetrancheid using "$raw/dealogic/DCMDealTranches.dta", umatch(dcmdealdealid trancheid) ukeep(currencyisocode) urename(currencyisocode currency_issued)
keep if _merge == 3
replace tranche_value_value = tranche_proceeds_value if tranche_value_value == . & tranche_proceeds_value !=.
gen issuance_in_local_currency = tranche_value_value if currency_issued == currencyisocode
bysort dcmdealtranchedealid   dcmdealtranchetrancheid (issuance_in_local_currency): replace issuance_in_local_currency = issuance_in_local_currency[1]
gen conversion_rate_at_issuance = issuance_in_local_currency/tranche_value_value
drop _merge
keep if currencyisocode == "USD"

/*  DCM Deal Tranches – ISINs */
mmerge dcmdealtranchedealid   dcmdealtranchetrancheid using "$raw/dealogic/DCMDealTranchesISINs.dta"
keep if _merge != 2
bys dcmdealtranchedealid dcmdealtranchetrancheid: gen _dup = _N
drop _merge sortnumber N

/* DCM Deal Tranches - Issue Characteristics */
mmerge dcmdealtranchedealid   dcmdealtranchetrancheid using "$raw/dealogic/DCMDealTranchesIssueCharacteristics.dta"
keep if _merge != 2
drop _merge _dup sortnumber
save "$tmp/dealogic/aux_file.dta", replace

/* DCM Deal – Master File */
use "$raw/dealogic/DCMDeal.dta", clear
foreach var of varlist _all {
     capture assert mi(`var')
     if !_rc {
        drop `var'
     }
}
mmerge issuerid using "$tmp/dealogic/company_complete.dta", umatch(id)
keep if _merge != 2
drop _merge
save "$tmp/dealogic/dcmdealnationality.dta", replace

/* DCM Deal Tranches – Master File */
use "$raw/dealogic/DCMDealTranches.dta", clear
drop isin currencyisocode
mmerge dcmdealdealid trancheid using "$tmp/dealogic/aux_file.dta", umatch(dcmdealtranchedealid dcmdealtranchetrancheid)
mmerge dcmdealdealid using "$tmp/dealogic/dcmdealnationality.dta", umatch(dealid)
keep if _merge == 3
replace pricingdate = substr(pricingdate,1,10)
replace announcementdate = substr(announcementdate,1,10)
replace settlementdate = substr(settlementdate,1,10)
replace maturitydate = substr(maturitydate,1,10)

gen _pricingdate = date(pricingdate,"YMD")
gen _announcementdate = date(announcementdate,"YMD")
gen _settlementdate = date(settlementdate,"YMD")
gen _maturitydate = date(maturitydate,"YMD")

gen mdate = mofd(_pricingdate)
format mdate %tm 
drop pricingdate
rename mdate pricingdate

gen mdate = mofd(_announcementdate)
format mdate %tm 
drop announcementdate 
rename mdate announcementdate

gen mdate = mofd(_settlementdate)
format mdate %tm 
drop settlementdate
rename mdate settlementdate

gen mdate = mofd(_maturitydate)
format mdate %tm 
drop maturitydate
rename mdate maturitydate
format %td _announcementdate _pricingdate _settlementdate _maturitydate
bys dcmdealdealid trancheid: gen n = _n
gen value = tranche_value_value/1000000 if n == 1
replace value = 0 if n > 1
duplicates drop
save "$tmp/dealogic/tranches_complete.dta", replace

* -----------------------------------------------------------------
* Consolidated issuance file
* -----------------------------------------------------------------

* ISIN to CUSIP mapping
use isin cusip using "$raw/cmns/gcap_security_master_isin.dta", clear
drop if missing(isin) | missing(cusip)
save "$tmp/isin_to_cusip", replace

* CUSIP to ISIN mapping
use cusip isin using "$raw/cmns/gcap_security_master_isin.dta", clear
drop if missing(isin) | missing(cusip)
save "$tmp/cusip_to_isin", replace

/* Preparing Dealogic DCS File */
cap restore
use "$tmp/dealogic/tranches_complete.dta", clear
bys dcmdealdealid trancheid: gen nvalues = _n
rename isin0 isin
preserve
keep dcmdealdealid trancheid isin cusip
duplicates drop
mmerge isin using "$tmp/isin_to_cusip", umatch(isin) uname(master_)
drop if _merge == 2
duplicates drop
replace cusip = master_cusip if cusip == "" & master_cusip != ""
drop _merge master*
duplicates drop
gsort dcmdealdealid trancheid -cusip
by dcmdealdealid trancheid: gen nvalues = _n
by dcmdealdealid trancheid: gen Nvalues = _N
by dcmdealdealid trancheid: replace cusip = cusip[1] if cusip == ""
drop if nvalues > 1 & cusip == ""
drop nvalues Nvalues
tempfile cusip
save `cusip'

restore
preserve
keep dcmdealdealid trancheid isin cusip
duplicates drop
qui mmerge cusip using "$tmp/cusip_to_isin", umatch(cusip) uname(master_)
drop if _merge == 2
duplicates drop
replace isin = master_isin if isin == "" & master_isin != ""
drop _merge master*
duplicates drop
gsort dcmdealdealid trancheid -isin
tempfile isin
save `isin'
restore

mmerge dcmdealdealid trancheid using `cusip', umatch(dcmdealdealid trancheid) uname(master_) ukeep(cusip)
replace cusip = master_cusip if cusip == "" & master_cusip != ""
drop master*

mmerge dcmdealdealid trancheid using `isin', umatch(dcmdealdealid trancheid) uname(master_) ukeep(isin)
replace isin = master_isin if isin == "" & master_isin != ""
drop nvalues _merge master_isin

duplicates drop
order cusip isin, after(trancheid)
gen cusip6 = substr(cusip,1,6)
duplicates drop dcmdealdealid trancheid   cusip isin, force
order cusip6, after(cusip)

gsort dcmdealdealid trancheid -cusip6
by dcmdealdealid trancheid: gen nvalues = _n
by dcmdealdealid trancheid: gen Nvalues = _N

rename tranche_face_value value
replace value = value /1000000
drop nvalues Nvalues
sort dcmdealdealid trancheid

save "$tmp/dealogic_dcm_issuance_complete.dta", replace
keep dcmdealdealid trancheid cusip cusip6 isin value settlementdate maturitydate
save "$tmp/dealogic_dcm_issuance_cleaned.dta", replace
